{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import json\n",
    "from IPython.display import display, HTML\n",
    "\n",
    "pd.set_option('display.max_colwidth', 500)\n",
    "\n",
    "CSV_PATH = '../tpch_with_cost_requests'\n",
    "BENCHMARK = 'tpch'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class AlgorithmStep():\n",
    "    def __init__(self, indexes, number_of_indexes, step, memory_consumption, workload_processing_cost):\n",
    "        self.indexes = indexes\n",
    "        self.number_of_indexes = number_of_indexes\n",
    "        self.step = step\n",
    "        self.memory_consumption = memory_consumption\n",
    "        self.workload_processing_cost = workload_processing_cost\n",
    "        \n",
    "    def memory_consumption_gb(self):\n",
    "        return f\"{self.memory_consumption / 1E9:.2f}\"\n",
    "    \n",
    "    def __repr__(self):\n",
    "        return f\"At step: {self.step} with {self.number_of_indexes} indexes ({self.memory_consumption_gb()} GB):\\nCost: {self.workload_processing_cost}\\nIndexes: {self.indexes}\"\n",
    "        \n",
    "def calculate_cost(columns, row):\n",
    "    cost = 0\n",
    "    for column in columns:\n",
    "        if column[0] == 'q':\n",
    "            cost += float(json.loads(row[column])['Cost'])\n",
    "    \n",
    "    return cost\n",
    "\n",
    "def shorten_tbl_name(index_name):\n",
    "    # CAREFUL THIS COMPLETELY REMOVES nation\n",
    "    if 'nation.' in index_name:\n",
    "        return None\n",
    "    return index_name.replace('nation.', 'n.').replace('customer.', 'c.').replace('lineitem.', 'l.').replace('partsupp.', 'ps.').replace('orders.', 'o.').replace('supplier.', 's.')\n",
    "\n",
    "def cut_index_string(indexes):\n",
    "    result = []\n",
    "    removed_brackets = indexes[1:-1]\n",
    "    for split_1 in removed_brackets.split('I('):\n",
    "        without_brace = split_1.split(')')[0]\n",
    "        if without_brace == '':\n",
    "            continue\n",
    "        index_name = f\"[{without_brace.replace('C ', '')}]\"\n",
    "        index_name = shorten_tbl_name(index_name)\n",
    "        if index_name is None:\n",
    "            continue\n",
    "        result.append(index_name)\n",
    "        \n",
    "    return result\n",
    "\n",
    "def get_new_and_removed_indexes_str(current_indexes, previous_indexes):\n",
    "    removed_indexes = previous_indexes - current_indexes\n",
    "    new_indexes = current_indexes - previous_indexes\n",
    "    \n",
    "    new_index_str = '\\n'.join(new_indexes) if len(new_indexes) else '-'\n",
    "    removed_index_str = '\\n'.join(removed_indexes) if len(removed_indexes) else '-'\n",
    "    \n",
    "    return new_index_str, removed_index_str\n",
    "\n",
    "def get_cost(columns, row, previous_cost, initial_cost):\n",
    "    cost = calculate_cost(columns, row)\n",
    "    cost_relative_to_previous = cost / previous_cost * 100\n",
    "    cost_relative_to_initial = cost / initial_cost * 100\n",
    "    cost_string = f'{cost_relative_to_previous:.2f}% / {cost_relative_to_initial:.2f}%'\n",
    "    \n",
    "    return cost_string, cost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Initial cost: 94942316.34\n"
     ]
    }
   ],
   "source": [
    "df = pd.read_csv(f'{CSV_PATH}/results_no_index_{BENCHMARK}_22_queries.csv', sep=';')\n",
    "df = df.drop(['q20'],axis=1)\n",
    "initial_cost = calculate_cost(df.columns, df.iloc[0])\n",
    "print(f'Initial cost: {initial_cost}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Budget Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Max Budget (GB)</th>\n",
       "      <th>Metric</th>\n",
       "      <th>drop</th>\n",
       "      <th>epic</th>\n",
       "      <th>ibm</th>\n",
       "      <th>microsoft</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.2</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[ps.ps_suppkey]</td>\n",
       "      <td>[part.p_partkey,part.p_name]<br>[s.s_name]<br>[s.s_suppkey,s.s_nationkey]<br>[s.s_suppkey,s.s_name]<br>[s.s_nationkey,s.s_suppkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>-</td>\n",
       "      <td>98.78% / 98.78%</td>\n",
       "      <td>98.91% / 98.91%</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.4</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_partkey]</td>\n",
       "      <td>[ps.ps_suppkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>-</td>\n",
       "      <td>35.70% / 35.26%</td>\n",
       "      <td>99.21% / 98.13%</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.8</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_partkey,l.l_suppkey]</td>\n",
       "      <td>[c.c_custkey]<br>[part.p_size]<br>[part.p_partkey,part.p_container]<br>[c.c_mktsegment,c.c_custkey]<br>[part.p_container,part.p_partkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_partkey]</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>-</td>\n",
       "      <td>98.04% / 34.57%</td>\n",
       "      <td>99.82% / 97.95%</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1.2</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_orderkey]</td>\n",
       "      <td>[part.p_type,part.p_partkey]<br>[ps.ps_partkey]<br>[c.c_nationkey,c.c_custkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>-</td>\n",
       "      <td>74.41% / 25.73%</td>\n",
       "      <td>98.35% / 96.33%</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[ps.ps_partkey]<br>[l.l_partkey]</td>\n",
       "      <td>[ps.ps_partkey]</td>\n",
       "      <td>[ps.ps_supplycost,ps.ps_partkey]<br>[o.o_orderstatus,o.o_orderkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>[part.p_type,part.p_partkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>96.06% / 96.06%</td>\n",
       "      <td>93.75% / 24.12%</td>\n",
       "      <td>99.72% / 96.06%</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2.4</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_returnflag]</td>\n",
       "      <td>[part.p_type,part.p_partkey]<br>[c.c_custkey,c.c_nationkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>99.95% / 96.01%</td>\n",
       "      <td>94.76% / 22.86%</td>\n",
       "      <td>99.95% / 96.01%</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>3.2</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_shipmode]</td>\n",
       "      <td>[l.l_partkey,l.l_suppkey]</td>\n",
       "      <td>[l.l_partkey,l.l_suppkey]<br>[ps.ps_suppkey,ps.ps_partkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>[c.c_custkey,c.c_nationkey]<br>[o.o_orderstatus,o.o_orderkey]<br>[ps.ps_supplycost,ps.ps_partkey]<br>[part.p_partkey,part.p_container]<br>[c.c_nationkey,c.c_custkey]<br>[part.p_type,part.p_partkey]<br>[ps.ps_partkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>34.83% / 33.44%</td>\n",
       "      <td>94.20% / 21.53%</td>\n",
       "      <td>34.83% / 33.44%</td>\n",
       "      <td>33.44% / 33.44%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>4</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[l.l_suppkey]</td>\n",
       "      <td>[o.o_orderkey]</td>\n",
       "      <td>[c.c_custkey,c.c_nationkey]<br>[ps.ps_supplycost,ps.ps_partkey]<br>[part.p_partkey,part.p_container]<br>[c.c_nationkey,c.c_custkey]<br>[ps.ps_partkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>95.07% / 31.79%</td>\n",
       "      <td>95.65% / 20.59%</td>\n",
       "      <td>95.07% / 31.79%</td>\n",
       "      <td>95.07% / 31.79%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>5</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[l.l_orderkey]</td>\n",
       "      <td>[o.o_custkey]</td>\n",
       "      <td>[part.p_type,part.p_partkey]<br>[o.o_orderstatus,o.o_orderkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>100.47% / 31.94%</td>\n",
       "      <td>96.77% / 19.93%</td>\n",
       "      <td>100.47% / 31.94%</td>\n",
       "      <td>100.47% / 31.94%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>6.4</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[c.c_custkey]</td>\n",
       "      <td>[l.l_partkey,l.l_quantity]</td>\n",
       "      <td>[l.l_suppkey,l.l_orderkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>[ps.ps_supplycost,ps.ps_partkey]<br>[o.o_orderstatus,o.o_orderkey]</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>100.98% / 32.25%</td>\n",
       "      <td>97.92% / 19.51%</td>\n",
       "      <td>100.98% / 32.25%</td>\n",
       "      <td>100.98% / 32.25%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>8</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[l.l_shipmode]</td>\n",
       "      <td>[l.l_orderkey,l.l_quantity]</td>\n",
       "      <td>[o.o_orderkey,o.o_orderdate]<br>[o.o_orderstatus,o.o_orderkey]<br>[ps.ps_supplycost,ps.ps_partkey]</td>\n",
       "      <td>[l.l_orderkey,l.l_returnflag]<br>[l.l_orderkey,l.l_quantity]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_orderkey]</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_suppkey,l.l_orderkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>96.12% / 31.00%</td>\n",
       "      <td>97.73% / 19.07%</td>\n",
       "      <td>96.12% / 31.00%</td>\n",
       "      <td>96.12% / 31.00%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>10</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[c.c_custkey]<br>[part.p_brand]<br>[o.o_orderkey]<br>[ps.ps_suppkey]<br>[s.s_suppkey]<br>[l.l_returnflag]<br>[part.p_partkey]<br>[o.o_orderstatus]<br>[o.o_custkey]</td>\n",
       "      <td>[l.l_orderkey]</td>\n",
       "      <td>[l.l_orderkey]</td>\n",
       "      <td>[ps.ps_partkey,ps.ps_supplycost]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>71.54% / 22.18%</td>\n",
       "      <td>97.55% / 18.60%</td>\n",
       "      <td>71.54% / 22.18%</td>\n",
       "      <td>71.54% / 22.18%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>12.8</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[part.p_size]<br>[l.l_shipdate]<br>[ps.ps_supplycost]<br>[c.c_mktsegment]</td>\n",
       "      <td>[ps.ps_partkey,ps.ps_supplycost]</td>\n",
       "      <td>[o.o_custkey,o.o_orderdate]<br>[l.l_returnflag]<br>[o.o_orderkey,o.o_orderpriority]</td>\n",
       "      <td>[o.o_orderkey,o.o_orderdate]<br>[c.c_custkey]<br>[ps.ps_suppkey]<br>[s.s_suppkey]<br>[l.l_shipmode]<br>[o.o_orderstatus,o.o_orderkey]<br>[part.p_partkey,part.p_type]<br>[l.l_returnflag]<br>[l.l_orderkey]<br>[o.o_custkey,o.o_orderkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[ps.ps_partkey]</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_orderkey,l.l_returnflag]<br>[ps.ps_suppkey,ps.ps_partkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>89.75% / 19.90%</td>\n",
       "      <td>98.04% / 18.24%</td>\n",
       "      <td>89.75% / 19.90%</td>\n",
       "      <td>89.75% / 19.90%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "algorithm_names = sorted(['epic', 'ibm', 'drop', 'microsoft'])\n",
    "df_table = pd.DataFrame(columns=['Max Budget (GB)', 'Metric'] + algorithm_names)\n",
    "metrics = ['New Indexes', 'Removed Indexes', '% of previous / initial cost']\n",
    "last_steps_indexes_per_algo = {}\n",
    "previous_cost = {}\n",
    "for algorithm_name in algorithm_names:\n",
    "    last_steps_indexes_per_algo[algorithm_name] = set()\n",
    "    previous_cost[algorithm_name] = initial_cost\n",
    "    \n",
    "df_data_ibm = pd.read_csv(f'{CSV_PATH}/results_ibm-max2_{BENCHMARK}_22_queries.csv',sep=';')\n",
    "df_data_ibm = df_data_ibm.drop(['q20'],axis=1)\n",
    "df_data_epic = pd.read_csv(f'{CSV_PATH}/results_epic_max_indexes_{BENCHMARK}_22_queries.csv',sep=';')\n",
    "df_data_epic = df_data_epic.drop(['q20'],axis=1)\n",
    "df_data_drop = pd.read_csv(f'{CSV_PATH}/results_drop_heuristic_{BENCHMARK}_22_queries.csv',sep=';')\n",
    "df_data_drop = df_data_drop.drop(['q20'],axis=1)\n",
    "df_data_ms = pd.read_csv(f'{CSV_PATH}/results_microsoft-max2_{BENCHMARK}_22_queries.csv',sep=';')\n",
    "df_data_ms = df_data_ms.drop(['q20'],axis=1)\n",
    "\n",
    "\n",
    "for (_, row_epic), (__, row_ibm) in zip(df_data_epic.iterrows(), df_data_ibm.iterrows()):\n",
    "    budget = json.loads(row_ibm['parameters'])['budget']\n",
    "    budget_bytes = budget * 1000 * 1000\n",
    "    for idx, metric in enumerate(metrics):\n",
    "        df_table = df_table.append(pd.DataFrame([[budget / 1000 if idx == 0 else '', metric] + len(algorithm_names) * ['-']], columns=df_table.columns),ignore_index=True)\n",
    "    \n",
    "    # EPIC\n",
    "    current_indexes = set(cut_index_string(row_epic['indexed columns']))    \n",
    "    new_index_str, removed_index_str = get_new_and_removed_indexes_str(current_indexes, last_steps_indexes_per_algo['epic'])\n",
    "    df_table['epic'].values[-3] = new_index_str\n",
    "    df_table['epic'].values[-2] = removed_index_str\n",
    "    last_steps_indexes_per_algo['epic'] = current_indexes\n",
    "    \n",
    "    cost_string, cost = get_cost(df_data_epic.columns, row_epic, previous_cost['epic'], initial_cost)\n",
    "    previous_cost['epic'] = cost\n",
    "    df_table['epic'].values[-1] = cost_string\n",
    "    \n",
    "    # IBM\n",
    "    current_indexes = set(cut_index_string(row_ibm['indexed columns']))    \n",
    "    new_index_str, removed_index_str = get_new_and_removed_indexes_str(current_indexes, last_steps_indexes_per_algo['ibm'])\n",
    "    df_table['ibm'].values[-3] = new_index_str\n",
    "    df_table['ibm'].values[-2] = removed_index_str\n",
    "    last_steps_indexes_per_algo['ibm'] = current_indexes\n",
    "    \n",
    "    cost_string, cost = get_cost(df_data_ibm.columns, row_ibm, previous_cost['ibm'], initial_cost)\n",
    "    previous_cost['ibm'] = cost\n",
    "    df_table['ibm'].values[-1] = cost_string\n",
    "    \n",
    "    #DROP\n",
    "    relevant = df_data_drop[(df_data_drop['memory consumption'] < budget_bytes)].tail(1)\n",
    "    if relevant.shape[0] > 0:\n",
    "        row_drop = relevant.iloc[0]\n",
    "        current_indexes = set(cut_index_string(row_drop['indexed columns']))\n",
    "        new_index_str, removed_index_str = get_new_and_removed_indexes_str(current_indexes, last_steps_indexes_per_algo['drop'])\n",
    "        df_table['drop'].values[-3] = new_index_str\n",
    "        df_table['drop'].values[-2] = removed_index_str\n",
    "        last_steps_indexes_per_algo['drop'] = current_indexes\n",
    "    \n",
    "        cost_string, cost = get_cost(df_data_ibm.columns, row_ibm, previous_cost['drop'], initial_cost)\n",
    "        previous_cost['drop'] = cost\n",
    "        df_table['drop'].values[-1] = cost_string\n",
    "        \n",
    "    #MS\n",
    "    relevant = df_data_ms[(df_data_ms['memory consumption'] < budget_bytes)].tail(1)\n",
    "    if relevant.shape[0] > 0:\n",
    "        row_ms = relevant.iloc[0]\n",
    "        current_indexes = set(cut_index_string(row_ms['indexed columns']))\n",
    "        new_index_str, removed_index_str = get_new_and_removed_indexes_str(current_indexes, last_steps_indexes_per_algo['microsoft'])\n",
    "        df_table['microsoft'].values[-3] = new_index_str\n",
    "        df_table['microsoft'].values[-2] = removed_index_str\n",
    "        last_steps_indexes_per_algo['microsoft'] = current_indexes\n",
    "    \n",
    "        cost_string, cost = get_cost(df_data_ibm.columns, row_ibm, previous_cost['microsoft'], initial_cost)\n",
    "        previous_cost['microsoft'] = cost\n",
    "        df_table['microsoft'].values[-1] = cost_string\n",
    "    \n",
    "    \n",
    "        \n",
    "def pretty_print(df):\n",
    "    return display( HTML( df.to_html().replace(\"\\\\n\",\"<br>\") ) )\n",
    "new_df = pretty_print(df_table)\n",
    "new_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "algorithm_names = sorted(['epic_max_indexes', 'microsoft-max2', 'ibm_max_indexes', 'drop_heuristic'])\n",
    "# algorithm_names = sorted(['epic_max_indexes', 'ibm_max_indexes', 'drop_heuristic'])\n",
    "\n",
    "algorithm_steps = {}\n",
    "\n",
    "for algorithm_name in algorithm_names:\n",
    "    path = f'{CSV_PATH}/results_{algorithm_name}_{BENCHMARK}_22_queries.csv'\n",
    "    df = pd.read_csv(path,sep=';')\n",
    "    df = df.drop(['q20'],axis=1)\n",
    "    algorithm_steps[algorithm_name] = []\n",
    "    for step, row in df.iterrows():\n",
    "        cost = calculate_cost(df.columns, row)\n",
    "        ast = AlgorithmStep(row['indexed columns'], row['#indexes'], step, row['memory consumption'], cost)\n",
    "        algorithm_steps[algorithm_name].append(ast)\n",
    "\n",
    "#     print(algorithm_name)\n",
    "#     for step in algorithm_steps[algorithm_name]:\n",
    "#         print(step)\n",
    "#         print(f'Cost relative to no indexes: {step.workload_processing_cost / initial_cost * 100}%')\n",
    "#         print()\n",
    "#     print()\n",
    "#     print()\n",
    "#     print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Step #</th>\n",
       "      <th>Metric</th>\n",
       "      <th>drop_heuristic</th>\n",
       "      <th>epic_max_indexes</th>\n",
       "      <th>ibm_max_indexes</th>\n",
       "      <th>microsoft-max2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[l.l_partkey]</td>\n",
       "      <td>[ps.ps_suppkey]</td>\n",
       "      <td>[s.s_name]</td>\n",
       "      <td>[l.l_partkey,l.l_suppkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>36.43% / 36.43%</td>\n",
       "      <td>98.78% / 98.78%</td>\n",
       "      <td>100.03% / 100.03%</td>\n",
       "      <td>35.57% / 35.57%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>1.57</td>\n",
       "      <td>0.21</td>\n",
       "      <td>0.01</td>\n",
       "      <td>2.51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[ps.ps_partkey]</td>\n",
       "      <td>[l.l_partkey]</td>\n",
       "      <td>[part.p_partkey,part.p_name]</td>\n",
       "      <td>[ps.ps_suppkey,ps.ps_partkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>92.91% / 33.85%</td>\n",
       "      <td>35.70% / 35.26%</td>\n",
       "      <td>98.96% / 98.99%</td>\n",
       "      <td>94.29% / 33.54%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>1.78</td>\n",
       "      <td>1.78</td>\n",
       "      <td>0.16</td>\n",
       "      <td>2.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>3</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[l.l_suppkey]</td>\n",
       "      <td>[l.l_partkey,l.l_suppkey]</td>\n",
       "      <td>[ps.ps_suppkey,ps.ps_availqty,ps.ps_partkey]</td>\n",
       "      <td>[l.l_suppkey,l.l_orderkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_partkey]</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>100.00% / 33.85%</td>\n",
       "      <td>98.04% / 34.57%</td>\n",
       "      <td>99.03% / 98.03%</td>\n",
       "      <td>96.31% / 32.30%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>3.34</td>\n",
       "      <td>2.72</td>\n",
       "      <td>0.71</td>\n",
       "      <td>5.35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>4</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[l.l_orderkey]</td>\n",
       "      <td>[l.l_orderkey]</td>\n",
       "      <td>[l.l_suppkey,l.l_partkey,l.l_shipdate]</td>\n",
       "      <td>[l.l_orderkey,l.l_quantity]<br>[l.l_orderkey,l.l_returnflag]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_suppkey,l.l_orderkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>73.98% / 25.04%</td>\n",
       "      <td>74.41% / 25.73%</td>\n",
       "      <td>99.32% / 97.36%</td>\n",
       "      <td>72.64% / 23.46%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>4.91</td>\n",
       "      <td>4.28</td>\n",
       "      <td>4.78</td>\n",
       "      <td>7.78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>5</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[l.l_shipmode]</td>\n",
       "      <td>[ps.ps_partkey]</td>\n",
       "      <td>[n.n_name,n.n_nationkey]</td>\n",
       "      <td>[ps.ps_partkey,ps.ps_supplycost]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>94.70% / 23.71%</td>\n",
       "      <td>93.75% / 24.12%</td>\n",
       "      <td>100.00% / 97.36%</td>\n",
       "      <td>95.02% / 22.29%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>7.03</td>\n",
       "      <td>4.49</td>\n",
       "      <td>4.78</td>\n",
       "      <td>8.13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>6</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[l.l_returnflag]</td>\n",
       "      <td>[l.l_returnflag]</td>\n",
       "      <td>[n.n_nationkey,n.n_name]</td>\n",
       "      <td>[l.l_shipmode]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>94.67% / 22.45%</td>\n",
       "      <td>94.76% / 22.86%</td>\n",
       "      <td>100.00% / 97.36%</td>\n",
       "      <td>94.05% / 20.97%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>8.44</td>\n",
       "      <td>5.90</td>\n",
       "      <td>4.78</td>\n",
       "      <td>10.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>7</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[ps.ps_suppkey]</td>\n",
       "      <td>[l.l_shipmode]</td>\n",
       "      <td>[n.n_nationkey,n.n_name,n.n_regionkey]</td>\n",
       "      <td>[l.l_returnflag]<br>[l.l_orderkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_orderkey,l.l_returnflag]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>98.38% / 22.09%</td>\n",
       "      <td>94.20% / 21.53%</td>\n",
       "      <td>100.00% / 97.36%</td>\n",
       "      <td>95.65% / 20.06%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>8.65</td>\n",
       "      <td>8.02</td>\n",
       "      <td>4.78</td>\n",
       "      <td>10.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>8</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[o.o_orderkey]</td>\n",
       "      <td>[o.o_orderkey]</td>\n",
       "      <td>[n.n_nationkey,n.n_regionkey,n.n_name]</td>\n",
       "      <td>[o.o_orderkey,o.o_orderdate]<br>[o.o_custkey,o.o_orderkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>95.75% / 21.15%</td>\n",
       "      <td>95.65% / 20.59%</td>\n",
       "      <td>100.00% / 97.36%</td>\n",
       "      <td>90.95% / 18.24%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>9.04</td>\n",
       "      <td>8.41</td>\n",
       "      <td>4.78</td>\n",
       "      <td>12.13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>9</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[o.o_custkey]</td>\n",
       "      <td>[o.o_custkey]</td>\n",
       "      <td>[n.n_name,n.n_nationkey,n.n_regionkey]</td>\n",
       "      <td>[c.c_custkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>96.85% / 20.48%</td>\n",
       "      <td>96.77% / 19.93%</td>\n",
       "      <td>100.00% / 97.36%</td>\n",
       "      <td>97.93% / 17.86%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>9.43</td>\n",
       "      <td>8.80</td>\n",
       "      <td>4.78</td>\n",
       "      <td>12.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>10</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[c.c_custkey]</td>\n",
       "      <td>[c.c_custkey]</td>\n",
       "      <td>[s.s_suppkey]</td>\n",
       "      <td>[s.s_suppkey,s.s_nationkey]<br>[ps.ps_suppkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>[ps.ps_suppkey,ps.ps_partkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>98.05% / 20.08%</td>\n",
       "      <td>97.92% / 19.51%</td>\n",
       "      <td>99.98% / 97.34%</td>\n",
       "      <td>99.76% / 17.82%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>9.47</td>\n",
       "      <td>8.84</td>\n",
       "      <td>4.79</td>\n",
       "      <td>12.05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>11</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[s.s_suppkey]</td>\n",
       "      <td>[l.l_orderkey,l.l_quantity]</td>\n",
       "      <td>[s.s_suppkey,s.s_nationkey,s.s_name]</td>\n",
       "      <td>[o.o_orderstatus,o.o_orderkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[l.l_orderkey]</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>98.41% / 19.76%</td>\n",
       "      <td>97.73% / 19.07%</td>\n",
       "      <td>99.99% / 97.33%</td>\n",
       "      <td>98.27% / 17.51%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>9.47</td>\n",
       "      <td>9.86</td>\n",
       "      <td>4.80</td>\n",
       "      <td>12.64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>12</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[o.o_orderstatus]</td>\n",
       "      <td>[l.l_orderkey]</td>\n",
       "      <td>[l.l_partkey,l.l_quantity,l.l_extendedprice]</td>\n",
       "      <td>[part.p_partkey,part.p_type]<br>[s.s_suppkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>[s.s_suppkey,s.s_nationkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>98.44% / 19.45%</td>\n",
       "      <td>97.55% / 18.60%</td>\n",
       "      <td>34.81% / 33.88%</td>\n",
       "      <td>99.05% / 17.34%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>9.83</td>\n",
       "      <td>11.43</td>\n",
       "      <td>4.97</td>\n",
       "      <td>12.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>13</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[part.p_partkey]</td>\n",
       "      <td>[ps.ps_partkey,ps.ps_supplycost]</td>\n",
       "      <td>[part.p_container,part.p_partkey,part.p_brand]</td>\n",
       "      <td>[l.l_shipdate]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[ps.ps_partkey]</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>99.44% / 19.35%</td>\n",
       "      <td>98.04% / 18.24%</td>\n",
       "      <td>99.65% / 33.76%</td>\n",
       "      <td>99.69% / 17.29%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>9.88</td>\n",
       "      <td>11.57</td>\n",
       "      <td>5.14</td>\n",
       "      <td>14.33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>14</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[part.p_brand]</td>\n",
       "      <td>[o.o_orderstatus]</td>\n",
       "      <td>[s.s_suppkey,s.s_address,s.s_nationkey]</td>\n",
       "      <td>[part.p_brand,part.p_size]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>99.73% / 19.29%</td>\n",
       "      <td>98.31% / 17.93%</td>\n",
       "      <td>100.00% / 33.76%</td>\n",
       "      <td>99.70% / 17.24%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>9.95</td>\n",
       "      <td>11.93</td>\n",
       "      <td>5.15</td>\n",
       "      <td>14.43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>15</td>\n",
       "      <td>New Indexes</td>\n",
       "      <td>[l.l_shipdate]</td>\n",
       "      <td>[o.o_custkey,o.o_orderkey]</td>\n",
       "      <td>[part.p_size]</td>\n",
       "      <td>[c.c_mktsegment,c.c_custkey]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td></td>\n",
       "      <td>Removed Indexes</td>\n",
       "      <td>-</td>\n",
       "      <td>[o.o_custkey]</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td></td>\n",
       "      <td>% of previous / initial cost</td>\n",
       "      <td>99.73% / 19.24%</td>\n",
       "      <td>98.99% / 17.75%</td>\n",
       "      <td>99.99% / 33.76%</td>\n",
       "      <td>99.75% / 17.19%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td></td>\n",
       "      <td>Total Size (GB)</td>\n",
       "      <td>11.52</td>\n",
       "      <td>12.16</td>\n",
       "      <td>5.20</td>\n",
       "      <td>14.51</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df = pd.DataFrame(columns=['Step #', 'Metric'] + algorithm_names)\n",
    "metrics = ['New Indexes', 'Removed Indexes', '% of previous / initial cost', 'Total Size (GB)']\n",
    "most_steps = max(list(map(lambda x: len(algorithm_steps[x]), algorithm_steps)))\n",
    "last_steps_indexes_per_algo = {}\n",
    "\n",
    "previous_cost = {}\n",
    "for algorithm_name in algorithm_names:\n",
    "    previous_cost[algorithm_name] = initial_cost\n",
    "\n",
    "for step in range(0, most_steps):\n",
    "    for idx, metric in enumerate(metrics):\n",
    "        df = df.append(pd.DataFrame([[step + 1 if idx == 0 else '', metric] + len(algorithm_names) * ['-']], columns=df.columns),ignore_index=True)\n",
    "    for algorithm_name in algorithm_names:\n",
    "        if algorithm_name not in last_steps_indexes_per_algo:\n",
    "            last_steps_indexes_per_algo[algorithm_name] = set()\n",
    "        if step < len(algorithm_steps[algorithm_name]):\n",
    "            algorithm_step = algorithm_steps[algorithm_name][step]\n",
    "            df[algorithm_name].values[-1] = algorithm_step.memory_consumption_gb()\n",
    "            cost_relative_to_previous = algorithm_step.workload_processing_cost / previous_cost[algorithm_name] * 100\n",
    "            previous_cost[algorithm_name] = algorithm_step.workload_processing_cost\n",
    "            cost_relative_to_initial = algorithm_step.workload_processing_cost / initial_cost * 100\n",
    "            cost_string = f'{algorithm_step.workload_processing_cost} ({cost_relative_to_previous:.2f}%)'\n",
    "            cost_string = f'{cost_relative_to_previous:.2f}% / {cost_relative_to_initial:.2f}%'\n",
    "            df[algorithm_name].values[-2] = cost_string\n",
    "            \n",
    "            indexes = set(cut_index_string(algorithm_step.indexes))\n",
    "            removed_indexes = last_steps_indexes_per_algo[algorithm_name] - indexes\n",
    "            new_indexes = indexes - last_steps_indexes_per_algo[algorithm_name]\n",
    "            last_steps_indexes_per_algo[algorithm_name] = indexes\n",
    "#             if 'drop' in algorithm_name:\n",
    "#                 print(indexes)\n",
    "#                 print(new_index)\n",
    "#                 print()\n",
    "#             assert len(new_index) == 1, f'There should be exactly one new index: {step} {algorithm_name}'\n",
    "#             already_seen_indexes_per_algo[algorithm_name] |= (new_index)\n",
    "            new_index_str = '\\n'.join(new_indexes) if len(new_indexes) else '-'\n",
    "#             new_index_str = next(iter(new_index))\n",
    "#             new_index_str = new_index_str.replace(',', '\\n')\n",
    "            df[algorithm_name].values[-4] = new_index_str\n",
    "            removed_index_str = '\\n'.join(removed_indexes) if len(removed_indexes) else '-'\n",
    "#             removed_index_str = removed_index_str.replace(',', '\\n')\n",
    "            df[algorithm_name].values[-3] = removed_index_str\n",
    "            \n",
    "            \n",
    "def pretty_print(df):\n",
    "    return display( HTML( df.to_html().replace(\"\\\\n\",\"<br>\") ) )\n",
    "new_df = pretty_print(df.head(15 * len(metrics)))\n",
    "new_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import html2markdown\n",
    "html2markdown.convert(df.to_html(index=False).replace('\\n', ''))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{llllll}\n",
      "\\toprule\n",
      "Max Budget (GB) &                        Metric &                                                                                                                                                 drop &                                                     epic &                                                                                                                                                                                                           ibm &                                                                                                                                                                                                                microsoft \\\\\n",
      "\\midrule\n",
      "            0.2 &                   New Indexes &                                                                                                                                                    - &                                                        - &                                                                                    [s.s\\_nationkey,s.s\\_suppkey] [part.p\\_partkey,part.p\\_name] [s.s\\_suppkey,s.s\\_name] [s.s\\_suppkey,s.s\\_nationkey] [s.s\\_name] &                                                                                                                                                                                                                        - \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                                                                                             - &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                                    - &                                          98.01\\% / 98.01\\% &                                                                                                                                                                                               98.91\\% / 98.91\\% &                                                                                                                                                                                                                        - \\\\\n",
      "            0.4 &                   New Indexes &                                                                                                                                                    - &                                          [ps.ps\\_suppkey] &                                                                                                                                                                                               [ps.ps\\_suppkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                                                                                             - &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                                    - &                                          98.74\\% / 96.77\\% &                                                                                                                                                                                               99.21\\% / 98.13\\% &                                                                                                                                                                                                                        - \\\\\n",
      "            0.8 &                   New Indexes &                                                                                                                                                    - &                                                        - &                                                                              [part.p\\_partkey,part.p\\_container] [part.p\\_size] [part.p\\_container,part.p\\_partkey] [c.c\\_custkey] [c.c\\_mktsegment,c.c\\_custkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                                                                                             - &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                                    - &                                         100.00\\% / 96.77\\% &                                                                                                                                                                                               99.82\\% / 97.95\\% &                                                                                                                                                                                                                        - \\\\\n",
      "            1.2 &                   New Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                    [part.p\\_type,part.p\\_partkey] [ps.ps\\_partkey] [c.c\\_nationkey,c.c\\_custkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                                                                                             - &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                                    - &                                         100.00\\% / 96.77\\% &                                                                                                                                                                                               98.35\\% / 96.33\\% &                                                                                                                                                                                                                        - \\\\\n",
      "              2 &                   New Indexes &                                                                                                                       [l.l\\_partkey] [ps.ps\\_partkey] &             [l.l\\_partkey] [ps.ps\\_suppkey,ps.ps\\_partkey] &                                                                                                                                              [o.o\\_orderstatus,o.o\\_orderkey] [ps.ps\\_supplycost,ps.ps\\_partkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                          [ps.ps\\_suppkey] &                                                                                                                                                                                  [part.p\\_type,part.p\\_partkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                      96.06\\% / 96.06\\% &                                          35.54\\% / 34.39\\% &                                                                                                                                                                                               99.72\\% / 96.06\\% &                                                                                                                                                                                                                        - \\\\\n",
      "            2.4 &                   New Indexes &                                                                                                                                                    - &                                          [ps.ps\\_partkey] &                                                                                                                                                     [part.p\\_type,part.p\\_partkey] [c.c\\_custkey,c.c\\_nationkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                                                                                             - &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                      99.95\\% / 96.01\\% &                                          97.45\\% / 33.52\\% &                                                                                                                                                                                               99.95\\% / 96.01\\% &                                                                                                                                                                                                                        - \\\\\n",
      "            3.2 &                   New Indexes &                                                                                                                                                    - &                                           [o.o\\_orderkey] &                                                                                                                                                                                     [l.l\\_partkey,l.l\\_suppkey] &                                                                                                                                                                 [ps.ps\\_suppkey,ps.ps\\_partkey] [l.l\\_partkey,l.l\\_suppkey] \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &  [part.p\\_partkey,part.p\\_container] [part.p\\_type,part.p\\_partkey] [ps.ps\\_supplycost,ps.ps\\_partkey] [ps.ps\\_partkey] [c.c\\_nationkey,c.c\\_custkey] [c.c\\_custkey,c.c\\_nationkey] [o.o\\_orderstatus,o.o\\_orderkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                      34.83\\% / 33.44\\% &                                          96.29\\% / 32.27\\% &                                                                                                                                                                                               34.83\\% / 33.44\\% &                                                                                                                                                                                                          33.44\\% / 33.44\\% \\\\\n",
      "              4 &                   New Indexes &                                                                                                                                        [l.l\\_suppkey] &                                         [l.l\\_returnflag] &                                                                [part.p\\_partkey,part.p\\_container] [ps.ps\\_supplycost,ps.ps\\_partkey] [ps.ps\\_partkey] [c.c\\_nationkey,c.c\\_custkey] [c.c\\_custkey,c.c\\_nationkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                                                                                             - &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                      95.07\\% / 31.79\\% &                                          96.09\\% / 31.01\\% &                                                                                                                                                                                               95.07\\% / 31.79\\% &                                                                                                                                                                                                          95.07\\% / 31.79\\% \\\\\n",
      "              5 &                   New Indexes &                                                                                                                                       [l.l\\_orderkey] &  [o.o\\_custkey] [l.l\\_partkey,l.l\\_suppkey] [c.c\\_custkey] &                                                                                                                                                  [part.p\\_type,part.p\\_partkey] [o.o\\_orderstatus,o.o\\_orderkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                          [l.l\\_partkey] [l.l\\_returnflag] &                                                                                                                                                                                                             - &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                     100.47\\% / 31.94\\% &                                          99.05\\% / 30.72\\% &                                                                                                                                                                                              100.47\\% / 31.94\\% &                                                                                                                                                                                                         100.47\\% / 31.94\\% \\\\\n",
      "            6.4 &                   New Indexes &                                                                                                                                                    - &                                         [l.l\\_returnflag] &                                                                                                                                                                                    [l.l\\_partkey,l.l\\_quantity] &                                                                                                                                                                                               [l.l\\_suppkey,l.l\\_orderkey] \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                              [o.o\\_orderstatus,o.o\\_orderkey] [ps.ps\\_supplycost,ps.ps\\_partkey] &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                     100.98\\% / 32.25\\% &                                          95.88\\% / 29.45\\% &                                                                                                                                                                                              100.98\\% / 32.25\\% &                                                                                                                                                                                                         100.98\\% / 32.25\\% \\\\\n",
      "              8 &                   New Indexes &                                                                                                                                       [l.l\\_shipmode] &         [s.s\\_suppkey] [l.l\\_orderkey] [o.o\\_orderstatus] &                                                                                                                [o.o\\_orderstatus,o.o\\_orderkey] [ps.ps\\_supplycost,ps.ps\\_partkey] [o.o\\_orderkey,o.o\\_orderdate] &                                                                                                                                                               [l.l\\_orderkey,l.l\\_quantity] [l.l\\_orderkey,l.l\\_returnflag] \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                                                                                             - &                                                                                                                                                                                               [l.l\\_suppkey,l.l\\_orderkey] \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                      96.12\\% / 31.00\\% &                                          69.07\\% / 20.34\\% &                                                                                                                                                                                               96.12\\% / 31.00\\% &                                                                                                                                                                                                          96.12\\% / 31.00\\% \\\\\n",
      "             10 &                   New Indexes &  [part.p\\_partkey] [part.p\\_brand] [o.o\\_orderkey] [c.c\\_custkey] [o.o\\_custkey] [o.o\\_orderstatus] [ps.ps\\_suppkey] [s.s\\_suppkey] [l.l\\_returnflag] &                              [l.l\\_orderkey,l.l\\_quantity] &                                                                                                                                                                                                [l.l\\_orderkey] &                                                                                                                                                                                         [ps.ps\\_partkey,ps.ps\\_supplycost] \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                                                                                             - &                                                                                                                                                                                                                        - \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                      71.54\\% / 22.18\\% &                                          95.54\\% / 19.43\\% &                                                                                                                                                                                               71.54\\% / 22.18\\% &                                                                                                                                                                                                          71.54\\% / 22.18\\% \\\\\n",
      "           12.8 &                   New Indexes &                                                                                  [ps.ps\\_supplycost] [part.p\\_size] [l.l\\_shipdate] [c.c\\_mktsegment] &                                           [l.l\\_shipmode] &                                                                                                                               [o.o\\_orderkey,o.o\\_orderpriority] [l.l\\_returnflag] [o.o\\_custkey,o.o\\_orderdate] &  [part.p\\_partkey,part.p\\_type] [o.o\\_custkey,o.o\\_orderkey] [o.o\\_orderkey,o.o\\_orderdate] [c.c\\_custkey] [ps.ps\\_suppkey] [l.l\\_shipmode] [s.s\\_suppkey] [l.l\\_orderkey] [l.l\\_returnflag] [o.o\\_orderstatus,o.o\\_orderkey] \\\\\n",
      "                &               Removed Indexes &                                                                                                                                                    - &                                                        - &                                                                                                                                                                                                             - &                                                                                                                                                             [ps.ps\\_suppkey,ps.ps\\_partkey] [l.l\\_orderkey,l.l\\_returnflag] \\\\\n",
      "                &  \\% of previous / initial cost &                                                                                                                                      89.75\\% / 19.90\\% &                                          93.17\\% / 18.11\\% &                                                                                                                                                                                               89.75\\% / 19.90\\% &                                                                                                                                                                                                          89.75\\% / 19.90\\% \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(df_table.to_latex(index=False,escape=True).replace('\\\\textbackslash n', ' '))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
